import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 加载数据集
url = "https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/ml-basics/seeds.csv"
df = pd.read_csv(url)

# 打印数据前10行
print(df.head(10))

# 可视化种子的分类为饼状图
species_count = df['species'].value_counts()

plt.figure(figsize=(6, 6))
plt.pie(species_count, labels=species_count.index, autopct='%1.1f%%', startangle=90, colors=['#ff9999','#66b3ff','#99ff99'])
plt.title('Distribution of Seed Species')
plt.axis('equal')  # 保证饼图为圆形
plt.show()

# 用箱线图可视化 area、kernel_length、groove_length 在不同种子种类的分布情况
plt.figure(figsize=(12, 8))

# 箱线图 - area
plt.subplot(1, 3, 1)
sns.boxplot(x='species', y='area', data=df)
plt.title('Area Distribution by Seed Species')

# 箱线图 - kernel_length
plt.subplot(1, 3, 2)
sns.boxplot(x='species', y='kernel_length', data=df)
plt.title('Kernel Length Distribution by Seed Species')

# 箱线图 - groove_length
plt.subplot(1, 3, 3)
sns.boxplot(x='species', y='groove_length', data=df)
plt.title('Groove Length Distribution by Seed Species')

plt.tight_layout()
plt.show()